Maverik needs an accurate daily forecast of sales metrics for a new store's first year of sales. Achieving this allows for more effective financial planning and accurate ROI documents.
This is a supervised regression problem where the predictors involve both categorical and numerical variables and the targets are four sales metrics.
This modeling assignment will show four different models used to forecast sales for a new store. We are using ETS, ARIMA, Prophet and XGBoost to forecast sales and will compare results.
We have two dataframes that we are combining based on the ID of the store.
Columns with null vaules were replaced with 'None'.
Store 21980 has exceptionally high diesel sales. As this is an outlier in our sales trend, we will remove this from the dataframes.
For the Prophet model, we are using one-hot encoding for some variables so that they help in building a better predictive model.
import warnings
warnings.filterwarnings('ignore')
#Installing Dependencies
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error
#Loading data
# File paths
time_series = "/Users/pablozarate/Documents/MSBA/Maverick/time_series_data_msba.csv"
qual_data = '/Users/pablozarate/Documents/MSBA/Maverick/qualitative_data_msba.csv'
data_sales = pd.read_csv(time_series)
data_stores = pd.read_csv(qual_data)
#Merging the datasets
df = data_stores.merge(data_sales, how = 'inner', on=['site_id_msba'])
# pd.set_option('display.max_columns', None)
# df.head()
#Checking for null
df.info()
#There are 6 columns with null values. These null values mean that the feature is not available. Replacing them with 'None'
#Replacing nulls with None
cols_to_fill = ['rv_lanes_layout','rv_lanes_stack_type','hi_flow_lanes_layout','hi_flow_lanes_stack_type','hi_flow_rv_lanes_layout','hi_flow_rv_lanes_stack_type']
for col_name in cols_to_fill:
df[col_name].fillna("None", inplace=True)
#Dropping irrelevant columns
df = df.drop(['Unnamed: 0_x','Unnamed: 0_y'], axis = 1)
#Converting date to datetime
df['calendar.calendar_day_date'] = pd.to_datetime(df['calendar.calendar_day_date'])
df.set_index('calendar.calendar_day_date', inplace=True)
#Removing data for site id 21980, has exceptionally high diesel sales
df = df[df['site_id_msba'] != 21980]
df.info()
# Making copy of DF for Prophet
df_prophet_copy = df.copy()
# One-Hot Encoding for Prophet
categorical_columns = ['pizza','rv_lanes','def','rv_lanes_layout','hi_flow_lanes','hi_flow_rv_lanes']
for column in categorical_columns:
# Get the one-hot encoded dataframe for the current column
one_hot = pd.get_dummies(df_prophet_copy[column], prefix=column)
# Drop the original categorical column from the dataframe
df_prophet_copy = df_prophet_copy.drop(column, axis=1)
# Join the one-hot encoded dataframe back with the original dataframe
df_prophet_copy = df_prophet_copy.join(one_hot)
# Renaming msba site id to store id for Prophet
df_prophet_copy.rename(columns={'site_id_msba': 'store_id'}, inplace=True)
# Segmenting dataframes by sales metric for Prophet
prophet_df_unleaded = df_prophet_copy[['store_id', 'calendar.calendar_day_date', 'unleaded', 'calendar_information.holiday'
,'x1_mile_pop', 'x1_mile_emp', 'x1_mile_income', 'x1_2_mile_pop', 'x1_2_mile_emp', 'x1_2_mile_income', 'x5_min_pop', 'x5_min_emp','x5_min_inc'
,'x7_min_pop', 'x7_min_emp', 'x7_min_inc', 'open_year', 'hi_flow_lanes_fueling_positions', 'mens_urinal_count', 'mens_toilet_count'
, 'womens_sink_count'
,'pizza_No', 'pizza_Yes', 'rv_lanes_No', 'rv_lanes_Yes', 'def_No', 'def_Yes', 'rv_lanes_layout_In-Line', 'rv_lanes_layout_None'
, 'rv_lanes_layout_Stack', 'hi_flow_lanes_No', 'hi_flow_lanes_Yes', 'hi_flow_rv_lanes_No', 'hi_flow_rv_lanes_Yes'
]].copy()
prophet_df_unleaded.rename(columns={'calendar.calendar_day_date': 'ds', 'unleaded': 'y'}, inplace=True)
prophet_df_diesel = df_prophet_copy[['store_id', 'calendar.calendar_day_date', 'diesel_y', 'calendar_information.holiday'
,'x1_mile_pop', 'x1_mile_emp', 'x1_mile_income', 'x1_2_mile_pop', 'x1_2_mile_emp', 'x1_2_mile_income', 'x5_min_pop', 'x5_min_emp','x5_min_inc'
,'x7_min_pop', 'x7_min_emp', 'x7_min_inc', 'open_year', 'hi_flow_lanes_fueling_positions', 'mens_urinal_count', 'mens_toilet_count'
, 'womens_sink_count'
,'pizza_No', 'pizza_Yes', 'rv_lanes_No', 'rv_lanes_Yes', 'def_No', 'def_Yes', 'rv_lanes_layout_In-Line', 'rv_lanes_layout_None'
, 'rv_lanes_layout_Stack', 'hi_flow_lanes_No', 'hi_flow_lanes_Yes', 'hi_flow_rv_lanes_No', 'hi_flow_rv_lanes_Yes'
]].copy()
prophet_df_diesel.rename(columns={'calendar.calendar_day_date': 'ds', 'diesel_y': 'y'}, inplace=True)
prophet_df_food_sales = df_prophet_copy[['store_id', 'calendar.calendar_day_date', 'daily_yoy_ndt.total_food_service', 'calendar_information.holiday'
,'x1_mile_pop', 'x1_mile_emp', 'x1_mile_income', 'x1_2_mile_pop', 'x1_2_mile_emp', 'x1_2_mile_income', 'x5_min_pop', 'x5_min_emp','x5_min_inc'
,'x7_min_pop', 'x7_min_emp', 'x7_min_inc', 'open_year', 'hi_flow_lanes_fueling_positions', 'mens_urinal_count', 'mens_toilet_count'
, 'womens_sink_count'
,'pizza_No', 'pizza_Yes', 'rv_lanes_No', 'rv_lanes_Yes', 'def_No', 'def_Yes', 'rv_lanes_layout_In-Line', 'rv_lanes_layout_None'
, 'rv_lanes_layout_Stack', 'hi_flow_lanes_No', 'hi_flow_lanes_Yes', 'hi_flow_rv_lanes_No', 'hi_flow_rv_lanes_Yes'
]].copy()
prophet_df_food_sales.rename(columns={'calendar.calendar_day_date': 'ds', 'daily_yoy_ndt.total_food_service': 'y'}, inplace=True)
prophet_df_inside_sales = df_prophet_copy[['store_id', 'calendar.calendar_day_date', 'daily_yoy_ndt.total_inside_sales', 'calendar_information.holiday'
,'x1_mile_pop', 'x1_mile_emp', 'x1_mile_income', 'x1_2_mile_pop', 'x1_2_mile_emp', 'x1_2_mile_income', 'x5_min_pop', 'x5_min_emp','x5_min_inc'
,'x7_min_pop', 'x7_min_emp', 'x7_min_inc', 'open_year', 'hi_flow_lanes_fueling_positions', 'mens_urinal_count', 'mens_toilet_count'
, 'womens_sink_count'
,'pizza_No', 'pizza_Yes', 'rv_lanes_No', 'rv_lanes_Yes', 'def_No', 'def_Yes', 'rv_lanes_layout_In-Line', 'rv_lanes_layout_None'
, 'rv_lanes_layout_Stack', 'hi_flow_lanes_No', 'hi_flow_lanes_Yes', 'hi_flow_rv_lanes_No', 'hi_flow_rv_lanes_Yes'
]].copy()
prophet_df_inside_sales.rename(columns={'calendar.calendar_day_date': 'ds', 'daily_yoy_ndt.total_inside_sales': 'y'}, inplace=True)
# Now creating a DataFrame for holidays for Prophet
holidays_df = df_prophet_copy[df_prophet_copy['calendar_information.holiday'].notnull()][['calendar.calendar_day_date', 'calendar_information.holiday']].copy()
holidays_df.rename(columns={'calendar.calendar_day_date': 'ds', 'calendar_information.holiday': 'holiday'}, inplace=True)
holidays_df = holidays_df.drop_duplicates() # Ensure there are no duplicate holiday entries
categorical_features = []
for column in df.columns:
if df[column].dtype == 'object':
categorical_features.append(column)
#Encoding Categorical Variables
df = pd.get_dummies(df, columns=categorical_features)
#Converting site_id to categorical
df['site_id_msba'] = df['site_id_msba'].astype('object')
#Confirming updates
df.info()
df.head()
df.describe()
We will be modeling sales metrics on ETS, ARIMA, Prophet and XGBoost. We have decided to pursue these models as they can each approach a time-series forecasting business problem.
First, we will look at the seasonal decomposition of each sales metric.
Afterwards, we'll split the dataset into testing and training, with the test set consisting of six stores.
We will then use RMSE as a performance metric for each store. We will decide on the best model based on the lowest average RMSE for the sales metrics.
decom_df = df.copy()
decom_df.set_index('calendar.calendar_day_date', inplace = True)
import statsmodels.api as sm
# Aggregate sales data for all stores on each date
combined_sales = decom_df.groupby(decom_df.index)['daily_yoy_ndt.total_inside_sales'].mean()
# Perform seasonal decomposition for the combined sales data
result = sm.tsa.seasonal_decompose(combined_sales, model='additive') # Change 'additive' to 'multiplicative' if necessary
# Access the decomposed components
trend = result.trend
seasonal = result.seasonal
residual = result.resid
# Plot the decomposed components for the combined sales data
plt.figure(figsize=(12, 6))
plt.suptitle('Seasonal Decomposition for Inside Sales')
plt.subplot(411)
plt.plot(combined_sales, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residual')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
# Aggregate sales data for all stores on each date
combined_sales = decom_df.groupby(decom_df.index)['daily_yoy_ndt.total_food_service'].mean()
# Perform seasonal decomposition for the combined sales data
result = sm.tsa.seasonal_decompose(combined_sales, model='additive') # Change 'additive' to 'multiplicative' if necessary
# Access the decomposed components
trend = result.trend
seasonal = result.seasonal
residual = result.resid
# Plot the decomposed components for the combined sales data
plt.figure(figsize=(12, 6))
plt.suptitle('Seasonal Decomposition for Food Service Sales')
plt.subplot(411)
plt.plot(combined_sales, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residual')
plt.legend(loc='best')
plt.tight_layout()
# Aggregate sales data for all stores on each date
combined_sales = decom_df.groupby(decom_df.index)['diesel_y'].mean()
# Perform seasonal decomposition for the combined sales data
result = sm.tsa.seasonal_decompose(combined_sales, model='additive') # Change 'additive' to 'multiplicative' if necessary
# Access the decomposed components
trend = result.trend
seasonal = result.seasonal
residual = result.resid
# Plot the decomposed components for the combined sales data
plt.figure(figsize=(12, 6))
plt.suptitle('Seasonal Decomposition for Diesel Sales')
plt.subplot(411)
plt.plot(combined_sales, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residual')
plt.legend(loc='best')
plt.tight_layout()
# Aggregate sales data for all stores on each date
combined_sales = decom_df.groupby(decom_df.index)['unleaded'].mean()
# Perform seasonal decomposition for the combined sales data
result = sm.tsa.seasonal_decompose(combined_sales, model='additive') # Change 'additive' to 'multiplicative' if necessary
# Access the decomposed components
trend = result.trend
seasonal = result.seasonal
residual = result.resid
# Plot the decomposed components for the combined sales data
plt.figure(figsize=(12, 6))
plt.suptitle('Seasonal Decomposition for Unleaded Sales')
plt.subplot(411)
plt.plot(combined_sales, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residual')
plt.legend(loc='best')
plt.tight_layout()
model_df = df.copy()
model_df2 = model_df
model_df['site_id_msba'].nunique()
#There are 36 unique site IDs, I will use 6 site IDs for the test dataset and 30 for training
np.random.seed(1234)
unique_sites = model_df['site_id_msba'].unique()
np.random.shuffle(unique_sites)
random_unique_sites = unique_sites[:6]
test_sites = random_unique_sites.tolist()
print(test_sites)
#Splitting the dataset into train and test sets based on site id
stores_for_testing = test_sites
stores_for_training = [x for x in unique_sites if x not in test_sites]
train_data = model_df[model_df['site_id_msba'].isin(stores_for_training)]
test_data = model_df[model_df['site_id_msba'].isin(stores_for_testing)]
# Get unique 'site_id_msba' values from test_data
unique_site_ids = test_data['site_id_msba'].unique()
# Create a dictionary to store the separate DataFrames
test_data_dict = {}
# Iterate through unique site IDs and create separate DataFrames
for idx, site_id in enumerate(unique_site_ids, start=1):
# Filter the test_data for the current site_id
test_data_site = test_data[test_data['site_id_msba'] == site_id]
# Store the filtered DataFrame in the dictionary with a numerical key
test_data_dict[idx] = test_data_site
# Access the separate DataFrames as test_data_1, test_data_2, test_data_3, etc.
# For example, to access test_data for 'site_id_msba' 1:
test_data_1 = test_data_dict[1]
test_data_2 = test_data_dict[2]
test_data_3 = test_data_dict[3]
test_data_4 = test_data_dict[4]
test_data_5 = test_data_dict[5]
test_data_6 = test_data_dict[6]
We are going to use these scores as benchmark. We took the average of each sales metric and used it as prediction. Compared to our test data, the performance is as follows:
Inside Sales MAE: 707.2735125491541 RMSE: 967.2567077320317
Food Service MAE: 236.37052076466694 RMSE: 298.75333424739136
Diesel MAE: 889.6733050759848 RMSE: 1003.2789422445452
Unleaded MAE: 906.8842300356473 RMSE: 1345.069038082699
# List of target columns (sales metrics)
target_columns = [
'daily_yoy_ndt.total_inside_sales',
'daily_yoy_ndt.total_food_service',
'diesel_y',
'unleaded'
]
# Initializing a dictionary to store evaluation results for each metric
evaluation_results = {}
# Looping through each target column
for metric in target_columns:
# Calculating Historical Average for the current metric
historical_average = train_data[metric].mean()
# Making Predictions for test data for the current metric
test_predictions = [historical_average] * len(test_data)
# Calculating Evaluation Metrics for the current metric
mae = mean_absolute_error(test_data[metric], test_predictions)
rmse = np.sqrt(mean_squared_error(test_data[metric], test_predictions))
evaluation_results[metric] = {'MAE': mae, 'RMSE': rmse}
# Printing the evaluation results for each metric
for metric, results in evaluation_results.items():
print(f"Metric: {metric}")
print(f"Mean Absolute Error (MAE): {results['MAE']}")
print(f"Root Mean Squared Error (RMSE): {results['RMSE']}")
The ETS models are a family of time series models with an underlying state space model consisting of a level component, a trend component (T), a seasonal component (S), and an error term (E).
Our data shows a trend and seasonality based on the seasonal decomposition performed earlier. This is why we decided to choose ETS as one of the models to train and evaluate.
train_ets = train_data.copy()
Benchmark
MAE: 707.27
RMSE: 967.25
Test 1
MAE: 503.89 RMSE: 631.46
Test 2
MAE: 760.74
RMSE: 862.00
Test 3
MAE: 575.32
RMSE: 726.03
Test 4
MAE: 1554.37
RMSE: 1765.73
Test 5
MAE: 986.74
RMSE: 1119.62
Test 6
MAE: 551.14 RMSE: 692.28
columns_to_remove = ['daily_yoy_ndt.total_food_service', 'diesel_y', 'unleaded']
train_inside = train_ets.drop(columns=columns_to_remove)
target1 = 'daily_yoy_ndt.total_inside_sales'
model_inside = ExponentialSmoothing(train_inside[target1], seasonal='add', seasonal_periods = 365)
model_fit_inside = model_inside.fit()
# Defining a function to perform the forecast, plot, and calculate errors
def forecast_and_evaluate(model, test_data, target, test_number):
forecast = model.forecast(steps=len(test_data))
test_data = test_data.sort_index()
# Plotting the forecast and actual data
plt.figure(figsize=(12, 6))
plt.plot(test_data.index, forecast, label=f"Forecast Test {test_number}", color='blue')
plt.plot(test_data.index, test_data[target], label=f"Actual Test {test_number}", color='green')
plt.title(f"Forecast vs Actual {target} Test {test_number}")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.grid(True)
plt.show()
# Calculating MAE and RMSE
mae = mean_absolute_error(test_data[target], forecast)
rmse = np.sqrt(mean_squared_error(test_data[target], forecast))
print(f"Mean Absolute Error (MAE) for Test {test_number}: {mae}")
print(f"Root Mean Squared Error (RMSE) for Test {test_number}: {rmse}")
test_data_list = [test_data_1, test_data_2, test_data_3, test_data_4, test_data_5, test_data_6]
model = model_fit_inside
# Looping through the test datasets and applying the function
for i, test_data in enumerate(test_data_list, start=1):
target = target1
forecast_and_evaluate(model, test_data, target, i)
Benchmark MAE: 236.37 RMSE: 298.75
Test 1
MAE: 155.18 RMSE: 196.79
Test 2
MAE: 366.29
RMSE: 384.48
Test 3
MAE: 187.72
RMSE: 224.85
Test 4
MAE: 423.73 RMSE: 462.54
Test 5
MAE: 222.41
RMSE: 288.66
Test 6
MAE: 211.78 RMSE: 250.74
columns_to_remove = ['daily_yoy_ndt.total_inside_sales', 'diesel_y', 'unleaded']
train_food = train_ets.drop(columns=columns_to_remove)
target2 = 'daily_yoy_ndt.total_food_service'
model_food = ExponentialSmoothing(train_food[target2], seasonal='add', seasonal_periods = 120)
model_fit_food = model_food.fit()
# Defining a function to perform the forecast, plot, and calculate errors
def forecast_and_evaluate(model, test_data, target, test_number):
forecast = model.forecast(steps=len(test_data))
test_data = test_data.sort_index()
# Plotting the forecast and actual data
plt.figure(figsize=(12, 6))
plt.plot(test_data.index, forecast, label=f"Forecast Test {test_number}", color='blue')
plt.plot(test_data.index, test_data[target], label=f"Actual Test {test_number}", color='green')
plt.title(f"Forecast vs Actual {target} Test {test_number}")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.grid(True)
plt.show()
# Calculaing MAE and RMSE
mae = mean_absolute_error(test_data[target], forecast)
rmse = np.sqrt(mean_squared_error(test_data[target], forecast))
print(f"Mean Absolute Error (MAE) for Test {test_number}: {mae}")
print(f"Root Mean Squared Error (RMSE) for Test {test_number}: {rmse}")
test_data_list = [test_data_1, test_data_2, test_data_3, test_data_4, test_data_5, test_data_6]
model = model_fit_food
# Looping through the test datasets and apply the function
for i, test_data in enumerate(test_data_list, start=1):
target = target2 # Replace with your target column name
forecast_and_evaluate(model, test_data, target, i)
Benchmark MAE: 889.67 RMSE: 1003.27
Test 1
MAE: 297.63
RMSE: 368.73
Test 2
MAE: 372.98 RMSE: 437.90
Test 3
MAE: 1202.85 RMSE: 1385.61
Test 4
MAE: 992.33 RMSE: 1160.30
Test 5
MAE: 544.97 RMSE: 607.49
Test 6
MAE: 661.04 RMSE: 805.87
columns_to_remove = ['daily_yoy_ndt.total_inside_sales','daily_yoy_ndt.total_food_service', 'unleaded']
train_diesel = train_ets.drop(columns=columns_to_remove)
train_diesel = train_diesel.groupby('calendar.calendar_day_date').mean()
target3 = 'diesel_y'
model_diesel= ExponentialSmoothing(train_diesel[target3], seasonal='add', seasonal_periods = 120)
model_fit_diesel = model_diesel.fit()
# Defining a function to perform the forecast, plot, and calculate errors
def forecast_and_evaluate(model, test_data, target, test_number):
forecast = model.forecast(steps=len(test_data))
test_data = test_data.sort_index()
# Plotting the forecast and actual data
plt.figure(figsize=(12, 6))
plt.plot(test_data.index, forecast, label=f"Forecast Test {test_number}", color='blue')
plt.plot(test_data.index, test_data[target], label=f"Actual Test {test_number}", color='green')
plt.title(f"Forecast vs Actual {target} Test {test_number}")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.grid(True)
plt.show()
# Calculating MAE and RMSE
mae = mean_absolute_error(test_data[target], forecast)
rmse = np.sqrt(mean_squared_error(test_data[target], forecast))
print(f"Mean Absolute Error (MAE) for Test {test_number}: {mae}")
print(f"Root Mean Squared Error (RMSE) for Test {test_number}: {rmse}")
test_data_list = [test_data_1, test_data_2, test_data_3, test_data_4, test_data_5, test_data_6]
model = model_fit_diesel
# Loop through the test datasets and apply the function
for i, test_data in enumerate(test_data_list, start=1):
target = target3 # Replace with your target column name
forecast_and_evaluate(model, test_data, target, i)
Benchmark MAE: 906.88 RMSE: 1345.06
Test 1
MAE: 779.46
RMSE: 862.54
Test 2
MAE: 3223.10 RMSE: 3459.38
Test 3
MAE: 819.70 RMSE: 994.29
Test 4
MAE: 537.34 RMSE: 635.64
Test 5
MAE: 596.21 RMSE: 694.44
Test 6
MAE: 1021.02
RMSE: 1170.13
columns_to_remove = ['daily_yoy_ndt.total_inside_sales','daily_yoy_ndt.total_food_service','diesel_y' ]
train_unleaded = train_ets.drop(columns=columns_to_remove)
train_unlaeded = train_unleaded.groupby('calendar.calendar_day_date').mean()
target4 = 'unleaded'
model_unleaded = ExponentialSmoothing(train_unleaded[target4], seasonal='add', seasonal_periods = 365)
model_fit_unleaded = model_unleaded.fit()
# Defining a function to perform the forecast, plot, and calculate errors
def forecast_and_evaluate(model, test_data, target, test_number):
forecast = model.forecast(steps=len(test_data))
test_data = test_data.sort_index()
# Plotting the forecast and actual data
plt.figure(figsize=(12, 6))
plt.plot(test_data.index, forecast, label=f"Forecast Test {test_number}", color='blue')
plt.plot(test_data.index, test_data[target], label=f"Actual Test {test_number}", color='green')
plt.title(f"Forecast vs Actual {target} Test {test_number}")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.grid(True)
plt.show()
# Calculating MAE and RMSE
mae = mean_absolute_error(test_data[target], forecast)
rmse = np.sqrt(mean_squared_error(test_data[target], forecast))
print(f"Mean Absolute Error (MAE) for Test {test_number}: {mae}")
print(f"Root Mean Squared Error (RMSE) for Test {test_number}: {rmse}")
test_data_list = [test_data_1, test_data_2, test_data_3, test_data_4, test_data_5, test_data_6]
model = model_fit_unleaded
# Looping through the test datasets and apply the function
for i, test_data in enumerate(test_data_list, start=1):
target = target4
forecast_and_evaluate(model, test_data, target, i)
ARIMA is a model used for forecasting future outcomes based on a historical time series. It is based on the statistical concept of serial correlation, where past data points influence future data points. ARIMA is flexible and can handle a wide range of time series data patterns, including trends, seasonality, and autocorrelation which is why we chose ARIMA as one of the models for this project.
import math
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.api as sm
# train_data.set_index('calendar.calendar_day_date', inplace = True)
train_data = train_data.sort_values(by='calendar.calendar_day_date')
# test_data.set_index('calendar.calendar_day_date', inplace = True)
test_data = test_data.sort_values(by='calendar.calendar_day_date')
# Function to train arima model which will be fit for each stores in the training data set
def train_arima_models(data,metric):
store_ids = data['site_id_msba'].unique()
models = {}
for store_id in store_ids:
store_data = data[data['site_id_msba'] == store_id]
time_series = store_data[metric]
# Train ARIMA model
model = sm.tsa.SARIMAX(time_series, order = (2, 1, 3))
model_fit = model.fit()
models[store_id] = model_fit
return models
# Function to average the forecast for stores in test data set predicted by all the models fitted for each stores in train dataset
def average_forecast_for_test_stores(models, test_data,metric):
new_store_ids = set(test_data['site_id_msba'].unique()) - set(models.keys())
all_forecasts = []
for store_id in new_store_ids:
store_data = test_data[test_data['site_id_msba'] == store_id]
time_series = store_data[metric]
# Use the average forecast from all models for new stores
average_forecast = np.mean([model.forecast(steps=len(time_series)) for model in models.values()], axis=0)
all_forecasts.append(average_forecast)
if all_forecasts:
return all_forecasts
else:
return None
# Train ARIMA models for each store in traindf
models = train_arima_models(train_data,'daily_yoy_ndt.total_inside_sales')
# Make an average forecast for store IDs in the test dataset that are not in the training dataset
test_store_forecasts = average_forecast_for_test_stores(models, test_data,'daily_yoy_ndt.total_inside_sales')
# if test_store_forecasts is not None:
# for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
# print(f'Average Forecast for Store {store_id}:\n{forecast}')
# else:
# print('No matching models found in the training data for the new test data store IDs.')
# Plot the actual vs predicted values of Inside Sales for each stores in test set
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['daily_yoy_ndt.total_inside_sales']
time_index = range(1, len(actual_data) + 1)
plt.figure(figsize=(10, 6))
plt.plot(time_index, actual_data, label='Actual', linestyle='-', color='blue')
plt.plot(time_index, forecast, label='Predicted', linestyle='-', color='red')
plt.title(f'Actual vs. Predicted Inside Sales for Store {store_id}')
plt.xlabel('Time Period')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()
# Evaluate the prediction error metrics for Inside Sales
results = {}
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['daily_yoy_ndt.total_inside_sales']
# Calculate the Mean Squared Error (MSE) for the predictions
mse = mean_squared_error(actual_data, forecast)
# Calculate the Root Mean Squared Error (RMSE)
rmse = math.sqrt(mse)
# Store the results in the dictionary
results['daily_yoy_ndt.total_inside_sales'] = {
'Predictions': forecast,
'MSE': mse,
'RMSE': rmse
}
print(f"RMSE of Inside sales for Store {store_id}: {rmse}")
print('\n')
# Train ARIMA models for each store in traindf
models = train_arima_models(train_data,'daily_yoy_ndt.total_food_service')
# Make an average forecast for store IDs in the test dataset that are not in the training dataset
test_store_forecasts = average_forecast_for_test_stores(models, test_data,'daily_yoy_ndt.total_food_service')
# if test_store_forecasts is not None:
# for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
# print(f'Average Forecast for Store {store_id}:\n{forecast}')
# else:
# print('No matching models found in the training data for the new test data store IDs.')
# Plot the actual vs predicted values of Food Service Sales for each stores in test set
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['daily_yoy_ndt.total_food_service']
time_index = range(1, len(actual_data) + 1)
plt.figure(figsize=(10, 6))
plt.plot(time_index, actual_data, label='Actual', linestyle='-', color='blue')
plt.plot(time_index, forecast, label='Predicted', linestyle='-', color='red')
plt.title(f'Actual vs. Predicted Food Sales for Store {store_id}')
plt.xlabel('Time Period')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()
# Evaluate the prediction error metrics for Food service Sales
results = {}
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['daily_yoy_ndt.total_food_service']
# Calculate the Mean Squared Error (MSE) for the predictions
mse = mean_squared_error(actual_data, forecast)
# Calculate the Root Mean Squared Error (RMSE)
rmse = math.sqrt(mse)
# Store the results in the dictionary
results['daily_yoy_ndt.total_food_service'] = {
'Predictions': forecast,
'MSE': mse,
'RMSE': rmse
}
print(f"RMSE of Food Service sales for Store {store_id}: {rmse}")
print('\n')
# Train ARIMA models for each store in traindf
models = train_arima_models(train_data,'diesel_y')
# Make an average forecast for store IDs in the test dataset that are not in the training dataset
test_store_forecasts = average_forecast_for_test_stores(models, test_data,'diesel_y')
# if test_store_forecasts is not None:
# for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
# print(f'Average Forecast for Store {store_id}:\n{forecast}')
# else:
# print('No matching models found in the training data for the new test data store IDs.')
# Plot the actual vs predicted values of Diesel Sales for each stores in test set
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['diesel_y']
time_index = range(1, len(actual_data) + 1)
plt.figure(figsize=(10, 6))
plt.plot(time_index, actual_data, label='Actual', linestyle='-', color='blue')
plt.plot(time_index, forecast, label='Predicted', linestyle='-', color='red')
plt.title(f'Actual vs. Predicted Diesel Sales for Store {store_id}')
plt.xlabel('Time Period')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()
# Evaluate the prediction error metrics for Diesel Sales
results = {}
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['diesel_y']
# Calculate the Mean Squared Error (MSE) for the predictions
mse = mean_squared_error(actual_data, forecast)
# Calculate the Root Mean Squared Error (RMSE)
rmse = math.sqrt(mse)
# Store the results in the dictionary
results['diesel_y'] = {
'Predictions': forecast,
'MSE': mse,
'RMSE': rmse
}
print(f"RMSE of Diesel sales for Store {store_id}: {rmse}")
print('\n')
# Train ARIMA models for each store in traindf
models = train_arima_models(train_data,'unleaded')
# Make an average forecast for store IDs in the test dataset that are not in the training dataset
test_store_forecasts = average_forecast_for_test_stores(models, test_data,'unleaded')
# if test_store_forecasts is not None:
# for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
# print(f'Average Forecast for Store {store_id}:\n{forecast}')
# else:
# print('No matching models found in the training data for the new test data store IDs.')
# Plot the actual vs predicted values of Unleaded Sales for each stores in test set
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['unleaded']
time_index = range(1, len(actual_data) + 1)
plt.figure(figsize=(10, 6))
plt.plot(time_index, actual_data, label='Actual', linestyle='-', color='blue')
plt.plot(time_index, forecast, label='Predicted', linestyle='-', color='red')
plt.title(f'Actual vs. Predicted Unleaded Sales for Store {store_id}')
plt.xlabel('Time Period')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()
# Evaluate the prediction error metrics for Unleaded Sales
results = {}
if test_store_forecasts is not None:
for store_id, forecast in zip(set(test_data['site_id_msba'].unique()) - set(models.keys()), test_store_forecasts):
store_data = test_data[test_data['site_id_msba'] == store_id]
actual_data = store_data['unleaded']
# Calculate the Mean Squared Error (MSE) for the predictions
mse = mean_squared_error(actual_data, forecast)
# Calculate the Root Mean Squared Error (RMSE)
rmse = math.sqrt(mse)
# Store the results in the dictionary
results['unleaded'] = {
'Predictions': forecast,
'MSE': mse,
'RMSE': rmse
}
print(f"RMSE of Unleaded sales for Store {store_id}: {rmse}")
print('\n')
Prophet is a forecasting tool developed by Meta designed to make predictions for time series data with strong seasonal patterns and missing data points.
This model captures weekly and holiday seasonality for each sales metric and uses feature selection to capture strongly correlated variables to improve model performance.
Each sales metric section shows RMSE and MAE for test stores.
Training and Testing the model doesn't take much time, less than five minutes.
Running the cross-validation for the Prophet models takes forty minutes.
# Splitting by Train and Test for Prophet
np.random.seed(1234)
# First, determine which stores to include in the training and testing sets
unique_stores = df_prophet_copy['store_id'].unique() # Ensure this contains all unique stores
# You could shuffle this array if you want a random selection of stores for train/test
np.random.shuffle(unique_stores) # Only if random selection is desired
# Select stores for training and testing
train_stores = unique_stores[6:]
test_stores = unique_stores[:6]
print(test_stores.tolist())
# Define a function to split the data for each metric for Prophet
def split_data(prophet_df):
train_data = prophet_df[prophet_df['store_id'].isin(train_stores)]
test_data = prophet_df[prophet_df['store_id'].isin(test_stores)]
return train_data, test_data
# Split by Train and Test for each metric for Prophet
train_unleaded, test_unleaded = split_data(prophet_df_unleaded)
train_diesel, test_diesel = split_data(prophet_df_diesel)
train_food_sales, test_food_sales = split_data(prophet_df_food_sales)
train_inside_sales, test_inside_sales = split_data(prophet_df_inside_sales)
# Training the model
from prophet import Prophet
# Models dictionary
models = {}
# Define the list of extra regressors you want to add.
extra_regressors = [
# 'x1_mile_pop', 'x1_mile_emp', 'x1_mile_income', 'x1_2_mile_pop', 'x1_2_mile_emp', 'x1_2_mile_income', 'x5_min_pop', 'x7_min_inc',
'hi_flow_lanes_fueling_positions', 'mens_urinal_count', 'mens_toilet_count', 'womens_sink_count','x5_min_emp', 'x7_min_pop', 'x7_min_emp', 'open_year'
,'pizza_No', 'pizza_Yes', 'rv_lanes_No', 'rv_lanes_Yes', 'def_No', 'def_Yes', 'rv_lanes_layout_In-Line', 'rv_lanes_layout_None', 'rv_lanes_layout_Stack',
'hi_flow_lanes_No', 'hi_flow_lanes_Yes', 'hi_flow_rv_lanes_No', 'hi_flow_rv_lanes_Yes'
]
# Train models for each metric
for metric in ['unleaded', 'diesel', 'food_sales', 'inside_sales']:
# Initialize the Prophet model with holidays
model = Prophet(holidays=holidays_df)
# Add each extra regressor to the model
for regressor in extra_regressors:
model.add_regressor(regressor)
# Select the training data for the current metric
train_data = globals()[f'train_{metric}']
# Fit the model using both the 'ds', 'y', and the extra regressors
model.fit(train_data[['ds', 'y'] + extra_regressors])
# Store the model in the dictionary
models[metric] = model
# Forecasting for Test Stores
# Dictionary to store forecasts
forecasts = {}
# Forecast for each metric and each store
for store_id in test_stores:
for metric in ['unleaded', 'diesel', 'food_sales', 'inside_sales']:
model = models[metric]
# Select test data for the current store and metric
test_data = globals()[f'test_{metric}']
store_test_data = test_data[test_data['store_id'] == store_id]
# Make sure that your future dataframe includes the regressor values
# This assumes that the test data includes the same extra regressor columns as the training data
future_regressors = store_test_data[extra_regressors].reset_index(drop=True)
# Make future dataframe for the model
future = model.make_future_dataframe(periods=len(store_test_data))
future = future.merge(store_test_data[['ds']], on='ds', how='right')
# Add the regressors to the future dataframe
future = pd.concat([future, future_regressors], axis=1)
# Forecast
forecast = model.predict(future)
# Store forecast with store ID and metric
forecasts[(store_id, metric)] = forecast[['ds', 'yhat']]
# Creating Evaluation Metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
# Evaluation metrics dictionary
evaluation_metrics = {}
# Calculate RMSE and MAE for each store and each metric
for store_id in test_stores:
for metric in ['unleaded', 'diesel', 'food_sales', 'inside_sales']:
# Actual sales
actual = globals()[f'test_{metric}'][globals()[f'test_{metric}']['store_id'] == store_id]['y']
# Predicted sales
predicted = forecasts[(store_id, metric)]['yhat']
# Ensure the lengths of actual and predicted are the same
min_length = min(len(actual), len(predicted))
actual, predicted = actual.iloc[:min_length], predicted.iloc[:min_length]
# Calculate RMSE and MAE
rmse = sqrt(mean_squared_error(actual, predicted))
mae = mean_absolute_error(actual, predicted)
# Store in dictionary
evaluation_metrics[(store_id, metric)] = {'RMSE': rmse, 'MAE': mae}
# Custom sort order
metric_order = {'inside_sales': 1, 'food_sales': 2, 'diesel': 3, 'unleaded': 4}
# Sorting function that uses the metric_order for sorting
def custom_metric_sort(store_metric):
_, metric = store_metric
return metric_order[metric]
# Sort the evaluation_metrics keys based on the custom metric order
sorted_metrics = sorted(evaluation_metrics.keys(), key=custom_metric_sort)
# Function to print the metrics for a given type
def print_metrics_for_type(metric_type):
for store_metric in sorted_metrics:
store_id, metric = store_metric
if metric == metric_type:
metrics = evaluation_metrics[store_metric]
print(f'Store ID: {store_id} - Metric: {metric}\n\tRMSE: {metrics["RMSE"]:.2f}\n\tMAE: {metrics["MAE"]:.2f}\n')
def plot_actual_vs_predicted(actual, predicted, metric, store_id):
plt.figure(figsize=(10, 5))
plt.plot(actual.reset_index(drop=True), label='Actual')
plt.plot(predicted.reset_index(drop=True), label='Predicted', alpha=0.7)
plt.title(f'Actual vs Predicted - {metric.capitalize()} for Store {store_id}')
plt.xlabel('Time')
plt.ylabel(metric.capitalize())
plt.legend()
plt.show()
# WITH REGRESSORS
# Organized review of metrics by type
print("Inside Sales Metrics:")
print_metrics_for_type('inside_sales')
# Plot actual vs. predicted for 'inside_sales' for each store
for store_id in test_stores:
metric = 'inside_sales'
actual = globals()[f'test_{metric}'][globals()[f'test_{metric}']['store_id'] == store_id]['y']
predicted = forecasts[(store_id, metric)]['yhat']
min_length = min(len(actual), len(predicted))
actual, predicted = actual.iloc[:min_length], predicted.iloc[:min_length]
plot_actual_vs_predicted(actual, predicted, metric, store_id)
# WITH REGRESSORS
print("Food Sales Metrics:")
print_metrics_for_type('food_sales')
# Plot actual vs. predicted for 'food_sales' for each store
for store_id in test_stores:
metric = 'food_sales'
actual = globals()[f'test_{metric}'][globals()[f'test_{metric}']['store_id'] == store_id]['y']
predicted = forecasts[(store_id, metric)]['yhat']
min_length = min(len(actual), len(predicted))
actual, predicted = actual.iloc[:min_length], predicted.iloc[:min_length]
plot_actual_vs_predicted(actual, predicted, metric, store_id)
#INCLUDE REGRESSORS
print("Diesel Sales Metrics:")
print_metrics_for_type('diesel')
# Plot actual vs. predicted for 'diesel' for each store
for store_id in test_stores:
metric = 'diesel'
actual = globals()[f'test_{metric}'][globals()[f'test_{metric}']['store_id'] == store_id]['y']
predicted = forecasts[(store_id, metric)]['yhat']
min_length = min(len(actual), len(predicted))
actual, predicted = actual.iloc[:min_length], predicted.iloc[:min_length]
plot_actual_vs_predicted(actual, predicted, metric, store_id)
# WITH REGRESSORS
print("Unleaded Sales Metrics:")
print_metrics_for_type('unleaded')
# Plot actual vs. predicted for 'unleaded' for each store
for store_id in test_stores:
metric = 'unleaded'
actual = globals()[f'test_{metric}'][globals()[f'test_{metric}']['store_id'] == store_id]['y']
predicted = forecasts[(store_id, metric)]['yhat']
min_length = min(len(actual), len(predicted))
actual, predicted = actual.iloc[:min_length], predicted.iloc[:min_length]
plot_actual_vs_predicted(actual, predicted, metric, store_id)
Cross-Validation for Prophet shows how the model would forecast daily sales for each metric over 365 days. Performance metrics are outlined below.
from prophet.diagnostics import cross_validation, performance_metrics
from prophet import Prophet
# Define the cross-validation function for Prophet
def cross_validate_prophet(data, initial, period, horizon, extra_regressors):
# Initialize the Prophet model with holidays
model = Prophet(holidays=holidays_df)
# Add each extra regressor to the model
for regressor in extra_regressors:
model.add_regressor(regressor)
# Fit the model using both the 'ds', 'y', and the extra regressors
model.fit(data[['ds', 'y'] + extra_regressors])
# Perform cross-validation
df_cv = cross_validation(model, initial=initial, period=period, horizon=horizon)
# Calculate performance metrics
df_p = performance_metrics(df_cv)
return df_p
# Define the initial training period, the period between cutoffs, and the forecast horizon
initial = '515 days' # the first two years of data for training
period = '1 days' # spacing between cutoff dates
horizon = '365 days' # forecast horizon
# Create an empty dictionary to store performance metrics for each metric
cv_metrics = {}
# List of metrics
metrics = ['unleaded', 'diesel', 'food_sales', 'inside_sales']
# Loop through each metric to perform cross-validation
for metric in metrics:
# Get the appropriate DataFrame
data = globals()[f'prophet_df_{metric}']
# Perform cross-validation and get performance metrics
performance = cross_validate_prophet(data, initial, period, horizon, extra_regressors)
# Store the performance metrics in the dictionary
cv_metrics[metric] = performance
# Print the performance metrics for inside_sales
print("Performance metrics for inside_sales:")
inside_sales_performance = cv_metrics['inside_sales']
print(inside_sales_performance)
print("\n")
# Print the performance metrics for food_sales
print("Performance metrics for food_sales:")
food_sales_performance = cv_metrics['food_sales']
print(food_sales_performance)
print("\n")
# Print the performance metrics for diesel
print("Performance metrics for diesel:")
diesel_performance = cv_metrics['diesel']
print(diesel_performance)
print("\n")
# Print the performance metrics for unleaded
print("Performance metrics for unleaded:")
unleaded_performance = cv_metrics['unleaded']
print(unleaded_performance)
print("\n")
XGBoost, which stands for Extreme Gradient Boosting, is a scalable, distributed gradient-boosted decision tree (GBDT) machine learning library. It provides parallel tree boosting and is the leading machine learning library for regression, classification, and ranking problems.
The model runs quickly and efficiently for this initial round of exploration and fitting. As we build upon and tune the model, we may need to reevaluate these statistics. Like we mentioned earlier in the analysis, we will primarily use RMSE to evaluate model performance.
import xgboost as xgb
from xgboost import plot_importance, plot_tree
plt.style.use('fivethirtyeight')
model_df2['site_id_msba'].nunique()
np.random.seed(1234)
unique_sites = model_df2['site_id_msba'].unique()
np.random.shuffle(unique_sites)
random_unique_sites = unique_sites[:6]
test_sites = random_unique_sites.tolist()
print(test_sites)
#Splitting the dataset into train and test sets based on site id
stores_for_testing = test_sites
stores_for_training = [x for x in unique_sites if x not in test_sites]
train_data = model_df2[model_df2['site_id_msba'].isin(stores_for_training)]
test_data = model_df2[model_df2['site_id_msba'].isin(stores_for_testing)]
# Get unique 'site_id_msba' values from test_data
unique_site_ids = test_data['site_id_msba'].unique()
# Create a dictionary to store the separate DataFrames
test_data_dict = {}
# Iterate through unique site IDs and create separate DataFrames
for idx, site_id in enumerate(unique_site_ids, start=1):
# Filter the test_data for the current site_id
test_data_site = test_data[test_data['site_id_msba'] == site_id]
# Store the filtered DataFrame in the dictionary with a numerical key
test_data_dict[idx] = test_data_site
# Access the separate DataFrames as test_data_1, test_data_2, test_data_3, etc.
# For example, to access test_data for 'site_id_msba' 1:
test_data_1 = test_data_dict[1]
test_data_2 = test_data_dict[2]
test_data_3 = test_data_dict[3]
test_data_4 = test_data_dict[4]
test_data_5 = test_data_dict[5]
test_data_6 = test_data_dict[6]
def create_features(df, label=None):
"""
Creates time series features from datetime index
"""
df['date'] = df.index
df['dayofweek'] = df['date'].dt.dayofweek
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['dayofyear'] = df['date'].dt.dayofyear
df['dayofmonth'] = df['date'].dt.day
df['weekofyear'] = df['date'].dt.weekofyear
X = df[['dayofweek','quarter','month','year',
'dayofyear','dayofmonth','weekofyear']]
if label:
y = df[label]
return X, y
return X
# function for mean absolute percentage error
def mean_absolute_percentage_error(y_true, y_pred):
"""Calculates MAPE given y_true and y_pred"""
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
# fetching datasets
train_xgb = train_data.copy()
test_xgb = test_data.copy()
# fetching key variable
test = test_xgb['daily_yoy_ndt.total_inside_sales']
train = train_xgb['daily_yoy_ndt.total_inside_sales']
train=train.to_frame()
test=test.to_frame()
# adding additional features
X_train, y_train = create_features(train, label='daily_yoy_ndt.total_inside_sales')
X_test, y_test = create_features(test, label='daily_yoy_ndt.total_inside_sales')
# train + fit model
reg = xgb.XGBRegressor(n_estimators=1000, enable_categorical=True)
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=50,
verbose=False) # Change verbose to True if you want to see it train
# Generate predictions
y_pred = reg.predict(X_test)
# Store predictions in the test DataFrame
test['Prediction'] = y_pred
# plots were originally designed to split by date and not store, so they were omitted
# performance markings are available in aggregate, rather than by store
MSE = mean_squared_error(y_true=test['daily_yoy_ndt.total_inside_sales'],
y_pred=test['Prediction'])
print("Mean Squared Error:\n")
print(MSE)
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)
MA = mean_absolute_error(y_true=test['daily_yoy_ndt.total_inside_sales'],
y_pred=test['Prediction'])
print("Mean Absolute Error:\n")
print(MA)
MAPE = mean_absolute_percentage_error(y_true=test['daily_yoy_ndt.total_inside_sales'],
y_pred=test['Prediction'])
print("Mean Absolute Percentage Error:\n")
print(MAPE)
# fetching key variable
test = test_xgb['daily_yoy_ndt.total_food_service']
train = train_xgb['daily_yoy_ndt.total_food_service']
train=train.to_frame()
test=test.to_frame()
# adding additional features
X_train, y_train = create_features(train, label='daily_yoy_ndt.total_food_service')
X_test, y_test = create_features(test, label='daily_yoy_ndt.total_food_service')
# train + fit model
reg = xgb.XGBRegressor(n_estimators=1000)
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=50,
verbose=False) # Change verbose to True if you want to see it train
# Generate predictions
y_pred = reg.predict(X_test)
# Store predictions in the test DataFrame
test['Prediction'] = y_pred
# plots were originally designed to split by date and not store, so they were omitted
# performance markings are available in aggregate, rather than by store
MSE = mean_squared_error(y_true=test['daily_yoy_ndt.total_food_service'],
y_pred=test['Prediction'])
print("Mean Squared Error:\n")
print(MSE)
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)
MA = mean_absolute_error(y_true=test['daily_yoy_ndt.total_food_service'],
y_pred=test['Prediction'])
print("Mean Absolute Error:\n")
print(MA)
MAPE = mean_absolute_percentage_error(y_true=test['daily_yoy_ndt.total_food_service'],
y_pred=test['Prediction'])
print("Mean Absolute Percentage Error:\n")
print(MAPE)
# fetching key variable
test = test_xgb['diesel_y']
train = train_xgb['diesel_y']
train=train.to_frame()
test=test.to_frame()
# adding additional features
X_train, y_train = create_features(train, label='diesel_y')
X_test, y_test = create_features(test, label='diesel_y')
# train + fit model
reg = xgb.XGBRegressor(n_estimators=1000)
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=50,
verbose=False) # Change verbose to True if you want to see it train
# Generate predictions
y_pred = reg.predict(X_test)
# Store predictions in the test DataFrame
test['Prediction'] = y_pred
# plots were originally designed to split by date and not store, so they were omitted
# performance markings are available in aggregate, rather than by store
MSE = mean_squared_error(y_true=test['diesel_y'],
y_pred=test['Prediction'])
print("Mean Squared Error:\n")
print(MSE)
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)
MA = mean_absolute_error(y_true=test['diesel_y'],
y_pred=test['Prediction'])
print("Mean Absolute Error:\n")
print(MA)
MAPE = mean_absolute_percentage_error(y_true=test['diesel_y'],
y_pred=test['Prediction'])
print("Mean Absolute Percentage Error:\n")
print(MAPE)
# fetching key variable
test = test_xgb['unleaded']
train = train_xgb['unleaded']
train=train.to_frame()
test=test.to_frame()
# adding additional features
X_train, y_train = create_features(train, label='unleaded')
X_test, y_test = create_features(test, label='unleaded')
# train + fit model
reg = xgb.XGBRegressor(n_estimators=1000)
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=50,
verbose=False) # Change verbose to True if you want to see it train
# Generate predictions
y_pred = reg.predict(X_test)
# Store predictions in the test DataFrame
test['Prediction'] = y_pred
# plots were originally designed to split by date and not store, so they were omitted
# performance markings are available in aggregate, rather than by store
MSE = mean_squared_error(y_true=test['unleaded'],
y_pred=test['Prediction'])
print("Mean Squared Error:\n")
print(MSE)
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)
MA = mean_absolute_error(y_true=test['unleaded'],
y_pred=test['Prediction'])
print("Mean Absolute Error:\n")
print(MA)
MAPE = mean_absolute_percentage_error(y_true=test['unleaded'],
y_pred=test['Prediction'])
print("Mean Absolute Percentage Error:\n")
print(MAPE)
The average RMSE for each sales metric for the models are as follows:
ETS
Inside Sales: 966.19
Food Service: 301.34
Diesel: 794.32
Unleaded: 1302.74
ARIMA
Inside Sales: 903.78
Food Service: 308.01
Diesel: 1006.06
Unleaded: 1100.31
Prophet
Inside Sales: 834.43
Food Service: 287.61
Diesel: 730.11
Unleaded: 1726.23
XGBoost
Inside Sales: 974.08
Food Service: 279.92
Diesel: 893.82
Unleaded: 1403.42
All models performed very well. However, Prophet produces the lowest RMSE scores for all sales metrics (with the exception of unleaded). As of this assignment submission, we will move forward with Prophet.
Before we present to Maverik, we will continue to make improvements to our models to see if any perform better than Prophet.
Bhoomika John Pedely
Roman Brock
Pablo Zarate
Biva Sherchan